package com.dmcb.trade.business.crawlers.author;

import com.dmcb.common.business.services.BaseService;
import com.dmcb.common.business.utils.DateUtil;
import com.dmcb.common.web.conversion.JsonResult;
import com.dmcb.common.business.utils.WebClientUtil;
import com.dmcb.trade.business.mappers.ArticleMonitorMapper;
import com.dmcb.trade.controller.ArticleCrawlerController;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.List;

/**
 * Created by 黑米 on 2017/4/6.
 */
@Service
public class JianKangShiBaoCrawler extends BaseService{

    @Autowired
    private WebClientUtil webClientUtil;
    @Autowired
    private ArticleCrawlerController articleCrawlerController;
    @Autowired
    private ArticleMonitorMapper articleMonitorMapper;

    Logger logger = LoggerFactory.getLogger(JianKangShiBaoCrawler.class);


    //http://www.jksb.com.cn/newspaper/Html/2017-03-03/Qpaper.html

    public JsonResult crawl (String time){
        List<String> linkList = new ArrayList<String>();
         time = DateUtil.getTimeByDianZiBao(time);

        Document doc = webClientUtil.getDoc("http://www.jksb.com.cn/newspaper/Html/"+time+"/Qpaper.html");
        if(doc == null){
            return error("解析失败或今日没有发布新闻");
        }
        Elements elements = doc.select("div.Paper>table>tbody>tr");
        if(elements == null || elements.size() == 0){
            return error("解析失败或今日没有发布新闻");
        }
        for(Element element : elements){
            linkList.add(element.select("a").attr("href"));
        }
        int success = 0 ;
        for (String src : linkList){
            doc = webClientUtil.getDoc("http://www.jksb.com.cn/newspaper/Html/2017-03-03/"+src);
            elements = doc.select("div.PaperB>table>tbody>tr");
            if(elements == null || elements.size() == 0){
                continue;
            }
            for(Element element : elements){
                String link = element.select("a").attr("href");
                //此处版权方ID写死，以后有更好的办法再改
                JsonResult js = articleCrawlerController.crawlMonitor(3384, "http://www.jksb.com.cn/newspaper/Html/2017-03-03/"+src);
                if(js.getStatus()== JsonResult.STATUS_SUCCESS){
                    success++;
                }
            }

        }
        return success("健康时报成功导入："+success+"篇");
    }




}
